\documentclass[10pt,oneside]{book}

\input{macros_orig.tex}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}

\pagestyle{empty}
\thispagestyle{empty}

\begin{codebox}
  \Procname{$\proc{SGD-Neural-Net}(\dataTrain, T, L, (m^1, \ldots,
    m^L), (f^1, \ldots, f^L))$}
  \li     \For $l \gets 1$ \To $L$
        \Do
  \li        $W_{ij}^l \sim \text{Gaussian}(0, 1/m^l)$
  \li        $W_{0j}^l \sim \text{Gaussian}(0, 1)$
            \End
  \li \For $t \gets 1$ \To $T$
  \li   \Do
          $i \gets \text{random sample from } \{1,\ldots,n\}$
  \li     $A^0 \gets \ex{x}{i}$
  \li     \Comment forward pass to compute the output $A^L$
  \li     \For $l \gets 1$ \To $L$
  \li       \Do
              $Z^l \gets W^{lT}A^{l-1} + W_0^l$
  \li         $A^l \gets f^l(Z^l)$
            \End
  \li     $\text{loss} \gets \text{Loss}(A^L, \ex{y}{i})$
  % \li     $\partial \text{loss}/\partial Z^L \gets \partial \text{loss}/\partial A^L \cdot \partial A^L\partial Z^L$
  % \li     $\partial \text{loss}/\partial W^L \gets \partial \text{loss}/\partial Z^L \cdot \partial Z^L\partial W^L$
  % \li     $\partial \text{loss}/\partial W_0^L \gets \partial \text{loss}/\partial Z^L \cdot \partial Z^L\partial W_0^L$
  % \li     $W^L \gets W^L - \eta(t) \cdot \partial \text{loss}/\partial W^L$
  % \li     $W^L_0 \gets W^L_0 - \eta(t)\cdot \partial \text{loss}/\partial W_0^L$
  \li     \For $l \gets L$ \To 1:
         \Do
  \li       \Comment error back-propagation
  \li       $\partial \text{loss}/\partial A^l \gets$ {\bf if} $l < L$ {\bf then}
            $\partial \text{loss}/\partial Z^{l+1}\cdot 
            \partial Z^{l+1}/\partial A^l$ {\bf else} $ \partial \text{loss}/\partial A^L$
  \li       $\partial \text{loss}/\partial Z^l \gets \partial
  \text{loss}/\partial A^l \cdot \partial A^l/\partial Z^l$
  \li       \Comment compute gradient with respect to weights
  \li       $\partial \text{loss}/\partial W^l \gets \partial \text{loss}/\partial Z^l\cdot \partial Z^l/\partial W^l$
  \li       $\partial \text{loss}/\partial W_0^l \gets \partial \text{loss}/\partial Z^l \cdot \partial Z^l/\partial W_0^l$
  \li       \Comment stochastic gradient descent update
  \li       $W^l = W^l - \eta(t)\cdot \partial \text{loss}/\partial W^l$
  \li       $W^l_0 = W_0^l - \eta(t)\cdot \partial \text{loss}/\partial W_0^l$
        \End
\end{codebox}

\end{document}
